main : main title
pseudoimage <- image(raw_data, transfo=log2)
MA_plot <- MAplot(raw_data)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'
### QA: Statistics
# BiocManager::install("AnnotationDbi")
# BiocManager::install("Biobase")
IQRray
# The IQRray statistic is obtained by ranking all the probe intensities from
# a given array and by computing the average rank for each probe set. The
# interquartile range (IQR) of the probe sets average ranks serves then as
# quality score.
#Example of a low score: 122783.6
# high score: 226124.6
library(methods)
library(AnnotationDbi)
library(Biobase)
#data - Affybatch object obtained after reading in celfiles into R with function ReadAffy from package affy
#obtaining intensity values for perfect match (pm) probes
pm_data<-pm(raw_data)
#ranking probe intensities for every array
rank_data<-apply(pm_data,2,rank)
#obtaining names of probeset for every probe
probeNames<-probeNames(raw_data)
#function computing IQR of mean probe ranks in probesets
get_IQR<-function(rank_data,probeNames){round(IQR(sapply(split(rank_data,probeNames),mean)),digits=2)}
#computing arIQR score
IQRray_score<-apply(rank_data,2,get_IQR,probeNames=probeNames)
Normalize Data
norm <- normalize(raw_data)
## Normalizing... OK
raw_table <- exprs(raw_data)
# RMA is most commonly used method for preprocessing normalization of Affymetrix microarray data. There
# are three steps in the RMA algorithm: Convolution Background Correction, Quantile Normalization, and
# Summarization using Median Polish.
normRMA <- rma(raw_data)
## Background correcting
## Normalizing
## Calculating Expression
norm_probeset_table <- exprs(normRMA)
#Human Exon array probeset to gene-level expression (biostars.org, https://www.biostars.org/p/271379/)
#Probe Level normalization
backgroundCorrection <- preprocessCore::rma.background.correct(raw_table)
probelevel_table <- preprocessCore::normalize.quantiles(backgroundCorrection, keep.names=TRUE)
QA Visualization for Normalized Data
MAplot_norm <- MAplot(normRMA)
densityPlot_norm <- hist(normRMA)
## PCA plot
pca <- prcomp(t(norm_probeset_table))
plot(pca$x[,1:2])
#how do you make a legend?
#legend('topright', col=1:6, legend = paste("Samples", 1:6), pch=20, bty='n', cex=.75)
DGE Analysis
## Getting Factor Values
fv <- data.frame(csv_file["Factor Value"])
all_factor_values <- fv[,1]
factor_values <- unique(all_factor_values)
all.factor.values <- make.names(all_factor_values)
factor.values <- make.names(factor_values)
## Creating Design
ph = raw_data@phenoData
ph@data[ ,2] = c(all.factor.values)
colnames(ph@data)[2]="source"
groups = ph@data$source
f = factor(groups,levels = factor.values)
design = model.matrix(~ 0 + f)
colnames(design) <- factor.values
#Run for Data Sets with 2 Factor Values
fit = limma::lmFit(norm_probeset_table,design)
#Run for Data Sets with 3 Factor Values
fit = limma::lmFit(normRMA,design)
## Creating Constrast Matrix
fit.groups <- colnames(fit$design) [which(fit$assign == 1)]
fit.index <- which(levels(levels) %in% fit.groups)
fit.group.names <- gsub(" ", "_", sub(",", "_", unique(csv_file$'Factor Value')))
combos <- combn(fit.groups, 2)
combos.names <- combn(fit.group.names, 2)
contrasts <- c(paste(combos[1,], combos[2,], sep = "-", paste(combos[2,], combos[1,], sep="-")))
contrast.names <- c(paste(combos.names[1,], combos.names[2,], sep = "v"), paste(combos.names[2,],
combos.names[1,], sep = "v"))
cont.matrix <- limma::makeContrasts(contrasts = contrasts, levels=design)
contrast.fit <- limma::contrasts.fit(fit, cont.matrix)
contrast.fit.eb <- limma::eBayes(contrast.fit)
log_fold_change <- contrast.fit.eb$coefficients[1:10,]
p_value <- contrast.fit.eb$p.value[1:10,]
adjusted_pvalue <- p.adjust(p_value,method="BH")
#results <- limma::decideTests(contrast.fit.eb, method = "separate", adjust.method = "BH", p.value = 0.05, lfc = 0.5)
#summary(limma::decideTests(contrast.fit.eb[,-1]))
#Error in h(simpleError(msg, call)) :
# error in evaluating the argument 'object' in selecting a method for function 'summary':
# subscript out of bounds
Gene Mapping
library(biomaRt)
library(dplyr)
listEnsembl()
## biomart version
## 1 genes Ensembl Genes 107
## 2 mouse_strains Mouse strains 107
## 3 snps Ensembl Variation 107
## 4 regulation Ensembl Regulation 107
ensembl <- useEnsembl(biomart = "genes")
searchDatasets(mart = ensembl, pattern = "Mouse")
## dataset description version
## 107 mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) Mmur_3.0
## 108 mmusculus_gene_ensembl Mouse genes (GRCm39) GRCm39
ensembl <- useDataset(dataset = "mmusculus_gene_ensembl", mart = ensembl)
searchAttributes(mart = ensembl, pattern = "affy_")
## name description page
## 95 affy_mg_u74a AFFY MG U74A probe feature_page
## 96 affy_mg_u74av2 AFFY MG U74Av2 probe feature_page
## 97 affy_mg_u74b AFFY MG U74B probe feature_page
## 98 affy_mg_u74bv2 AFFY MG U74Bv2 probe feature_page
## 99 affy_mg_u74c AFFY MG U74C probe feature_page
## 100 affy_mg_u74cv2 AFFY MG U74Cv2 probe feature_page
## 101 affy_moe430a AFFY MOE430A probe feature_page
## 102 affy_moe430b AFFY MOE430B probe feature_page
## 103 affy_moex_1_0_st_v1 AFFY MoEx 1 0 st v1 probe feature_page
## 104 affy_mogene_1_0_st_v1 AFFY MoGene 1 0 st v1 probe feature_page
## 105 affy_mogene_2_1_st_v1 AFFY MoGene 2 1 st v1 probe feature_page
## 106 affy_mouse430a_2 AFFY Mouse430A 2 probe feature_page
## 107 affy_mouse430_2 AFFY Mouse430 2 probe feature_page
## 108 affy_mu11ksuba AFFY Mu11KsubA probe feature_page
## 109 affy_mu11ksubb AFFY Mu11KsubB probe feature_page
my_affy_ids <- c(head(rownames(norm_probeset_table), 2000))
results <- getBM(
attributes = c(
"affy_mogene_1_0_st_v1",
"ensembl_gene_id",
"uniprot_gn_symbol",
"go_id"
), # Columns we want, these were found using searchAttributes
filters = "affy_mogene_1_0_st_v1", # Filters (one or more) that should be used in the query
values = my_affy_ids, # Query IDs
mart = ensembl)
Verify Normalization
print("Verify Normalization step")
## [1] "Verify Normalization step"
DGE Analysis
print("DGE Analysis step")
## [1] "DGE Analysis step"
Gene Annotations
print("Gene Annotations step")
## [1] "Gene Annotations step"
Code I will not be using but do not want to delete
# #trying to make a contrast matrix
# comparisons <- c()
# for (i in range(length(factor_values)))
# {
# comparisons <- append(comparisons, fv[i]'-'fv[i+1])
# }
# #attempt 2
# counter <- 2
# for (fv in factor_values)
# {
# comparisons <- append(comparisons, fv'-'fv[counter])
# counter <- counter + 1
# }
# #gene mapping with adf
# adf <- read.delim(file=r"(C:\Users\linde\OneDrive\Desktop\GLDS-6_metadata_A-AFFY-43.adf.txt)", header=FALSE, skip=74)
# probeset_names <- c(adf[1])
# ensembl_id_with_emptys <- adf[12]
# refseq <- c(adf[2])
# ensembl_id <- lapply(ensembl_id_with_emptys, function(x) x[x!=""])
# probeset_ensembl_refseq <- data.frame(probeset_names, ensembl_id_with_emptys, refseq)
# ens <- c(probeset_ensembl_refseq[,2])
# probeset <- c(probeset_ensembl_refseq[,1])
# rs <- c(probeset_ensembl_refseq[,3])
# prfromnorm <- rownames(norm_probeset_table)
# #probeset_and_ensembl <- data.frame(probeset_names, ensembl_id)
# c <- 1
# probeset_refseq_dict <- c()
# for (i in probeset)
# {
# probeset_refseq_dict[rs[c]]<-i
# c <- c + 1
# }
# rsdupsbool <- duplicated(rs)
# dupsindex<-c()
# counter <- 1
# for (bool in rsdups)
# {
# if (bool=="TRUE")
# {
# dupsindex <- append(dupsindex, counter)
# }
# counter <- counter + 1
# }
# duprefseq <- c()
# for (index in dupsindex)
# {
# duprefseq <- append(duprefseq, rs[index])
# }
# #rsdups <- duplicated(rs)
# uniquers <- unique(rs)
# probeset_refseq_dict <- c()
# #probeset_with_uniquerefseq[uniquers[probeset_with_refseq[ps]]]<-ps